001    /*
002     * CondorGPBSDispatcher.java
003     *
004     * Created on June 8, 2004, 11:17 AM
005     *
006     * This file is part of the STAR Scheduler.
007     * Copyright (c) 2002-2003 STAR Collaboration - Brookhaven National Laboratory
008     *
009     * STAR Scheduler is free software; you can redistribute it and/or modify
010     * it under the terms of the GNU General Public License as published by
011     * the Free Software Foundation; either version 2 of the License, or
012     * (at your option) any later version.
013     *
014     * STAR Scheduler is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017     * GNU General Public License for more details.
018     *
019     * You should have received a copy of the GNU General Public License
020     * along with STAR Scheduler; if not, write to the Free Software
021     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
022     */
023    package gov.bnl.star.offline.scheduler.condorg;
024    
025    import gov.bnl.star.offline.scheduler.*;
026    import gov.bnl.star.offline.scheduler.Dispatchers.lsf.CSHApplication;
027    import gov.bnl.star.offline.scheduler.Dispatchers.lsf.LSFDispatcher;
028    import gov.bnl.star.offline.scheduler.util.CSHCommandLineTask;
029    import gov.bnl.star.offline.scheduler.util.FilesystemToolkit;
030    import gov.bnl.star.offline.scheduler.util.StatisticsRecorder;
031    
032    import java.io.File;
033    import java.io.FileOutputStream;
034    import java.io.PrintStream;
035    import java.util.*;
036    
037    import java.util.logging.Level;
038    import java.util.logging.Logger;
039    
040    
041    /** Dispatches jobs using Condor-G on a remote site that uses PBS. 
042     * It will NOT use extra rsl attributes for PBS.  If needed they will
043     * be added later. 
044     * @author Alex Withers
045     * @version 1.0 2004/06/08
046     */
047    public class CondorGPBSDispatcher extends LSFDispatcher {
048        static private Logger log = Logger.getLogger(CondorGPBSDispatcher.class.getName());
049    
050        private static String condorEx;
051        protected CSHApplication application;
052    
053        public void setCondorEx(String condorEx) {
054            this.condorEx = condorEx;
055        }
056        
057        public String getCondorEx() {
058            return condorEx;
059        }
060    
061        /** Creates a new dispatcher */
062        public CondorGPBSDispatcher() {
063        }
064    
065        /** Creates the scripts and dispatches the job on the target machine.
066         * @param request the job request
067         */
068        public void dispatch(Request request, List jobs) {
069            log.info("Dispatching using Condor-g and LSF: \"" + request.getCommand() +
070                "\"");
071    
072            // Enables the simulation mode if necessary
073            useSimulationMode(request.getSimulation());
074            reportedFailure = false;
075    
076            // Submits from the higher to the lower JobID. This way the
077            // user has a feel of  when the last job is going to be
078            // submitted
079            for (int nProcess = jobs.size() - 1; nProcess >= 0;
080                    nProcess--) {
081                Job job = (Job) jobs.get(nProcess);
082    
083                System.out.print("Dispatching process " +
084                    job.getJobID() + ".");
085                dispatch(request, job);
086            }
087    
088            StatisticsRecorder.getIntance().recordStatistics(request, jobs);
089        }
090    
091        protected void dispatch(Request request, Job job) {
092            application = (CSHApplication) ComponentLibrary.getInstance().getComponent("CSHApplication");
093    
094            // TODO: all the parameters should be passed in one go
095            application.setJob(request, job);
096            application.setScratchDir(scratchDir);
097            application.setSubmissionCommand(getCondorGCommand(request, job));
098    
099            application.prepareJob();
100            prepareClassAd(request, job);
101    
102            log.info("Executing \"" + getCondorGCommand(request, job) + "\"");
103    
104            if (!simulation) {
105                try {
106                    Thread.sleep(getMsBtwnSuccess());
107                } catch (Exception e) {
108                }
109    
110                int attempt = 0;
111                boolean success = false;
112    
113                while (!success && (attempt < getMaxAttempts())) {
114                    try {
115                        CSHCommandLineTask task = new CSHCommandLineTask(getCondorGCommand(
116                                    request, job), true, 30000);
117                        task.execute();
118    
119                        if (task.getExitStatus() != 0) {
120                            log.warning("bsub failed: " + task.getOutput());
121                            Thread.sleep(getMsBtwnFailure());
122                            System.out.print("/");
123                            attempt++;
124                        } else {
125                            success = true;
126                        }
127                    } catch (Exception e) {
128                        log.log(Level.SEVERE,
129                            "Couldn't submit the script to Condor-g", e);
130    
131                        try {
132                            Thread.sleep(getMsBtwnFailure());
133                        } catch (Exception e1) {
134                        }
135    
136                        System.out.print("/");
137                        attempt++;
138                    }
139                }
140    
141                if (success) {
142                    System.out.println(" done.");
143                } else {
144                    System.out.println(" FAILED!!");
145                }
146            } else {
147                System.out.println(" simulated.");
148            }
149        }
150    
151        /** Returns the command line to submit the job through condor-g.
152         * @param request the request that originated the job
153         * @param job the job to be dispatched
154         * @return the commandline to submit the job
155         */
156        protected String getCondorGCommand(Request request, Job job) {
157            return condorEx + " " + getClassAdName(request, job);
158        }
159    
160        /** Returns the name of the file containing the class ad. Class ad is the job
161         * description required by condor to submit a job.
162         * @param request the request that originated the job
163         * @param job the job to be submitted
164         * @return the file name of the class ad
165         */
166        protected String getClassAdName(Request request, Job job) {
167            return "sched" + job.getJobID() + ".condorg";
168        }
169    
170        private void prepareClassAd(Request request, Job job) {
171            try {
172                PrintStream classAd = new PrintStream(new FileOutputStream(
173                            new File(getClassAdName(request, job))));
174                createClassAd(request, job, classAd);
175            } catch (Exception e) {
176                log.log(Level.SEVERE, "Couldn't create the class ad", e);
177                throw new RuntimeException("Couldn't create the class ad " +
178                    getClassAdName(request, job) + ": " + e.getMessage());
179            }
180        }
181    
182        private void createClassAd(Request request, Job job,
183            PrintStream classAd) {
184            classAd.print("executable = ");
185            classAd.println(getExecutable());
186    
187            if (getArguments() != null) {
188                classAd.print("arguments = ");
189                classAd.println(getArguments());
190            }
191    
192            classAd.print("globusscheduler = ");
193            classAd.println(getGlobusScheduler());
194    
195            if (application.getStdin() != null) {
196                classAd.print("input = ");
197                classAd.println(application.getStdin());
198            }
199    
200            if (application.getStdout() != null) {
201                classAd.print("output = ");
202                classAd.println(application.getStdout());
203            }
204    
205            if (application.getStderr() != null) {
206                classAd.print("error = ");
207                classAd.println(application.getStderr());
208            }
209    
210            classAd.print("log = ");
211            classAd.println(getLogName(job));
212    
213            if (getRemoteDirectory() != null) {
214                classAd.print("remote_initialdir = ");
215                classAd.println(getRemoteDirectory());
216            }
217    
218            /* This is basically the main difference from
219             * CondorGLSFDispatcher.java.  No globus-rsl stuff.
220             * -- Alex Withers 
221             */
222            /*
223            classAd.print("globusrsl =");
224    
225            if (job.getTarget() != null) {
226                classAd.print(" (xlsfmachine = ");
227                classAd.print(job.getTarget());
228                classAd.print(")");
229            }
230    
231            if (application.getJobName() != null) {
232                classAd.print(" (xlsfjobname = ");
233                classAd.print(application.getJobName());
234                classAd.print(")");
235            }
236    
237            if (request.getMail()) {
238                classAd.print(" (xlsfmailreport = ");
239                classAd.print("false");
240                classAd.print(")");
241            } else {
242                classAd.print(" (xlsfmailreport = ");
243                classAd.print("true");
244                classAd.print(")");
245            }
246    
247            if (getResourceUsageSwitch(job) != null) {
248                classAd.print(" (xlsfresources = ");
249                classAd.print(getResourceUsageSwitch(job));
250                classAd.print(")");
251            }
252    
253            if (job.getQueue() != null) {
254                classAd.print(" (queue = ");
255                classAd.print(job.getQueue());
256                classAd.print(")");
257            }
258    
259            classAd.println();
260            */
261    
262            if (isTransferExecutable()) {
263                classAd.println("transfer_executable = true");
264            } else {
265                classAd.println("transfer_executable = false");
266            }
267            classAd.println("notification = never");
268            classAd.println("universe = globus");
269            classAd.println("queue");
270        }
271    
272        private String getExecutable() {
273            if (application.getCommandLine().indexOf(' ') == -1) {
274                return application.getCommandLine();
275            }
276    
277            return application.getCommandLine().substring(0,
278                application.getCommandLine().indexOf(' '));
279        }
280    
281        private String getArguments() {
282            if (application.getCommandLine().indexOf(' ') == -1) {
283                return null;
284            }
285    
286            return application.getCommandLine().substring(application.getCommandLine()
287                                                                     .indexOf(' ') +
288                1);
289        }
290    
291        private String getLogName(Job job) {
292            // TODO maybe log filename should be put as a general property of Process (as stds)
293            return "sched" + job.getJobID() + ".condorg.log";
294        }
295    
296        private String getGlobusScheduler() {
297            //TODO make it flexible
298            return getGlobusGatekeeper();
299        }
300        
301        private String gatekeeper;
302        
303        /** Holds value of property transferExecutable. */
304        private boolean transferExecutable;
305        
306        public void setGlobusGatekeeper(String gatekeeper) {
307            this.gatekeeper = gatekeeper;
308        }
309        
310        public String getGlobusGatekeeper() {
311            return gatekeeper;
312        }
313    
314        private String remoteInitialDir;
315        
316        public void setRemoteInitialDir(String remoteInitialDir) {
317            this.remoteInitialDir = remoteInitialDir;
318        }
319        
320        public String getRemoteInitialDir() {
321            return remoteInitialDir;
322        }
323        
324        private String getRemoteDirectory() {
325            // TODO this has to be specified better: remote execution directory could be different from scheduler execution directory
326            if (".".equals(getRemoteInitialDir())) return FilesystemToolkit.getCurrentDirectory();
327            return getRemoteInitialDir();
328        }
329    
330        protected String getResourceUsageSwitch(Job job) {
331            String res = super.getResourceUsageSwitch(job);
332            if (res == null) return res;
333    
334            return res.replaceAll("\"", "\\\\\"");
335        }
336        
337        /** Getter for property transferExecutable.
338         * @return Value of property transferExecutable.
339         *
340         */
341        public boolean isTransferExecutable() {
342            return this.transferExecutable;
343        }
344        
345        /** Setter for property transferExecutable.
346         * @param transferExecutable New value of property transferExecutable.
347         *
348         */
349        public void setTransferExecutable(boolean transferExecutable) {
350            this.transferExecutable = transferExecutable;
351        }
352        
353    }